## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo = FALSE, results = "asis", warning = FALSE, message = FALSE, error = FALSE)

## ------------------------------------------------------------------------
rm(list = ls())

# Install required packages
pkg <- c("estimatr","stargazer","tidyverse","foreign","effects",
         "gridExtra","scales","grid","ggrepel","lme4","AER",
         "lme4","plm","Hmisc","sjPlot")
inst <- pkg %in% installed.packages()
if (length(pkg[!inst]) > 0) install.packages(pkg[!inst])
rm(pkg, inst)

library(estimatr)
library(stargazer)
library(tidyverse)
library(foreign)
library(effects)
library(gridExtra)
library(scales)
library(grid)
library(ggrepel)
library(AER)
library(lme4)
library(plm)
library(tidyverse)

# Set Working Directory to wherever files are downloaded
# setwd("")

## ----importdata----------------------------------------------------------
#Read cross-national data

cross_national_ffs <- read.csv("cross_national_ffs_final.csv") %>% 
  dplyr::select(-1)

#Read annual-level panel

annualpanel_ffs <- read.csv("annualpanel_ffs_final.csv") %>%
  dplyr::select(-1)

#Read monthly-level data

monthlypanel_ffs <- read.csv("monthlypanel_ffs.csv") %>%
  dplyr:: select(-1)

monthlypanel_ffs$date <- as.Date(paste("01-",monthlypanel_ffs$year_month,sep=""),"%d-%b-%y")
monthlypanel_ffs$benchmark_2015_adj <- monthlypanel_ffs$price_usd_2015 - monthlypanel_ffs$bmgap2015adj

# Loading annual gasoline consumption variable
load("consdata.RData")
consdata <- consdata[,c("wb_code","year","conspct","gasolinecons")]

## ----FIG1, fig.width=13, fig.height=9------------------------------------
#------------------------------------------------------------------------------#
#   Figure 1                                                                   #
#   Gasoline prices by country, 2003-15                                        #
#------------------------------------------------------------------------------#

# Prices over time by country updated benchmark
#   Congo Dem Rep is excluded for visual purposes 
#   (high spike in 2001 due to changes in Xrate)

fig1 <- ggplot(monthlypanel_ffs[monthlypanel_ffs$country2!="ZAR",],aes(x=date,y=price_usd_2015)) + 
  geom_line(aes(group=country),col="darkgray", size = 0.25) + 
  geom_line(data=monthlypanel_ffs[monthlypanel_ffs$country2=="USA",],
            aes(x=date,y=benchmark_2015_adj), size=0.5, col="black") + 
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks=date_breaks("6 months"),
               labels = date_format("%Y"),
               limits=c(as.Date("2003-01-01"),as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks=seq(0,3.25,.5), 
                     labels=c("0.00","0.50","1.00","1.50","2.00","2.50","$3.00")) + 
  labs(x="\n Year",y="Gasoline price (constant 2015 USD per liter) \n") + 
  coord_cartesian(ylim=c(-0.05,3.25)) +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))

# Figure 1 variations
## Defining oil exporters as >30% of total exports
oilexporters <- as.character(na.omit(unique(cross_national_ffs$country2[cross_national_ffs$meanfuelexports>30])))
monthlypanel_ffs$oilexporters <- 1*(monthlypanel_ffs$country2 %in% oilexporters)
avg.oilexporters <- summarize(group_by(monthlypanel_ffs,date,oilexporters),
                              meanprice = mean(price_usd_2015, na.rm = T))


## Defining income by tercile (top, middle, low)
lowgdp <- quantile(cross_national_ffs$meangdppc, probs = c(1/3), na.rm = T)
topgdp <- quantile(cross_national_ffs$meangdppc, probs = c(2/3), na.rm = T)

richcountries <- as.character(na.omit(unique(cross_national_ffs$country2[cross_national_ffs$meangdppc>=topgdp])))
poorcountries <- as.character(na.omit(unique(cross_national_ffs$country2[cross_national_ffs$meangdppc<lowgdp])))

monthlypanel_ffs$richcountries <- ifelse(monthlypanel_ffs$country2 %in% richcountries, 1,
                                         ifelse(monthlypanel_ffs$country2 %in% poorcountries, 3, 2))
avg.richcountries <- summarize(group_by(monthlypanel_ffs,date,richcountries),
                              meanprice = mean(price_usd_2015, na.rm = T))


## Defining democracy using Polity binary
democracies <- as.character(na.omit(unique(cross_national_ffs$country2[cross_national_ffs$democracy_polity == 1])))
monthlypanel_ffs$democracies <- 1*(monthlypanel_ffs$country2 %in% democracies)
avg.democracies <- summarize(group_by(monthlypanel_ffs,date,democracies),
                              meanprice = mean(price_usd_2015, na.rm = T))


# Rich-poor plot
fig1b <- ggplot(monthlypanel_ffs[monthlypanel_ffs$country2!="ZAR",],
       aes(x=date,y=price_usd_2015, col = factor(richcountries))) + 
  geom_line(aes(group=country), size = 0.25, alpha = 0.25) + 
  scale_color_manual(values = c("#6699CC","#CC6677","#117733"),
                     labels = c("High income","Middle income","Low income"),
                     name = "") +
  geom_line(data=monthlypanel_ffs[monthlypanel_ffs$country2=="USA",],
            aes(x=date,y=benchmark_2015_adj), size=0.5, col="black") + 
  geom_line(data=avg.richcountries,
            aes(x=date,y=meanprice,col=factor(richcountries)), size=1.5) +
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks=date_breaks("6 months"),
               labels = date_format("%Y"),
               limits=c(as.Date("2003-01-01"),as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks=seq(0,3.25,.5), 
                     labels=c("0.00","0.50","1.00","1.50","2.00","2.50","$3.00")) + 
  labs(x="\n Year",y="Gasoline price (constant 2015 USD per liter) \n") + 
  coord_cartesian(ylim=c(-0.05,3.25)) +
  theme(legend.position = c(0.15, 0.9),
        legend.key = element_rect(fill = NA),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))


# exporter-importer plot
fig1c <- ggplot(monthlypanel_ffs[monthlypanel_ffs$country2!="ZAR",],
       aes(x=date,y=price_usd_2015, col = factor(oilexporters))) + 
  geom_line(aes(group=country), size = 0.25, alpha = 0.25) + 
  scale_color_manual(values = c("#6699CC","#CC6677"),
                     labels = c("Oil importers","Oil exporters"),
                     name = "") + 
  geom_line(data=monthlypanel_ffs[monthlypanel_ffs$country2=="USA",],
            aes(x=date,y=benchmark_2015_adj), size=0.5, col="black") + 
  geom_line(data=avg.oilexporters,
            aes(x=date,y=meanprice,col=factor(oilexporters)), size=1.5) +
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks=date_breaks("6 months"),
               labels = date_format("%Y"),
               limits=c(as.Date("2003-01-01"),as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks=seq(0,3.25,.5), 
                     labels=c("0.00","0.50","1.00","1.50","2.00","2.50","$3.00")) + 
  labs(x="\n Year",y="Gasoline price (constant 2015 USD per liter) \n") + 
  coord_cartesian(ylim=c(-0.05,3.25)) +
  theme(legend.position = c(0.15, 0.9),
        legend.key = element_rect(fill = NA),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))



# democracy-autocracy plot
monthlypanel_ffs$democracies <- factor(monthlypanel_ffs$democracies, 
                                       levels = c(1,0))
fig1d <- ggplot(monthlypanel_ffs[monthlypanel_ffs$country2!="ZAR",],
       aes(x=date,y=price_usd_2015, col = factor(democracies))) + 
  geom_line(aes(group=country), size = 0.25, alpha = 0.25) + 
  geom_line(data=monthlypanel_ffs[monthlypanel_ffs$country2=="USA",],
            aes(x=date,y=benchmark_2015_adj), size=0.5, col="black") + 
  geom_line(data=avg.democracies,
            aes(x=date,y=meanprice,col=factor(democracies)), size=1.5) +
  # scale_linetype_manual(values = c("solid","dashed"),
  #                       labels = c("Democracies","Non-democracies"),
  #                       name = "") +  
  scale_color_manual(values = c("#6699CC","#CC6677"),
                     labels = c("Democracies","Non-democracies"),
                     name = "") + 
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks=date_breaks("6 months"),
               labels = date_format("%Y"),
               limits=c(as.Date("2003-01-01"),as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks=seq(0,3.25,.5), 
                     labels=c("0.00","0.50","1.00","1.50","2.00","2.50","$3.00")) + 
  labs(x="\n Year",y="Gasoline price (constant 2015 USD per liter) \n") + 
  coord_cartesian(ylim=c(-0.05,3.25)) +
  theme(legend.position = c(0.15, 0.9),
        legend.key = element_rect(fill = NA),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))

grid.arrange(fig1, fig1b, fig1c, fig1d, ncol = 2)

## ----FIG2, fig.width=6.5, fig.height=5-----------------------------------
#------------------------------------------------------------------------------#
#   Figure 2                                                                   #
#   Fuel dependence and net gasoline taxes by country                          #
#------------------------------------------------------------------------------#

ggplot(cross_national_ffs, 
       aes(x = fuel_income_dependence, y = meanbmgap2015adj)) + 
  geom_point(data = cross_national_ffs %>% filter(meanfuelexports <= 30), 
             aes(x = fuel_income_dependence, y = meanbmgap2015adj), alpha = 0.3) + 
  geom_point(data = cross_national_ffs %>% filter(meanfuelexports > 30), 
             aes(x = fuel_income_dependence, y = meanbmgap2015adj)) + 
  geom_smooth(data = cross_national_ffs, method = "lm",colour="black") +
  geom_text_repel(data = cross_national_ffs %>% filter(meanfuelexports > 30), 
                  aes(x = fuel_income_dependence, y = meanbmgap2015adj, label = country), size = 2) + 
  labs(x = "\n Fossil Fuel Dependence (% of GDP)", 
       y = "Net gasoline tax \n (constant 2015 USD per liter) \n") +
  theme(legend.position="none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))

## ----FIG3a, fig.width=6, fig.height=4------------------------------------
#------------------------------------------------------------------------------#
#   Figure 3                                                                   #
#   Income per capita and net gasoline taxes by country                        #
#------------------------------------------------------------------------------#

# Log gdp variable
cross_national_ffs$loggdp <- log(cross_national_ffs$meangdppcatlas)

# All major exporters, but fitted line excludes Norway
ggplot(cross_national_ffs[cross_national_ffs$meanfuelexports>30,], 
       aes(x = loggdp, y = meanbmgap2015adj, label = country2)) + 
  geom_point() + 
  geom_smooth(data = cross_national_ffs[cross_national_ffs$meanfuelexports>30 & cross_national_ffs$country2!="NOR",], colour="black") +
  labs(x = "\n GNI per capita (logged)", 
       y = "Net gasoline tax \n (constant 2015 USD per liter) \n") +
  theme(legend.position="none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))

## ----FIG3b, fig.width=6, fig.height=4------------------------------------
ggplot(cross_national_ffs[cross_national_ffs$meanfuelexports<30,], 
       aes(x = loggdp, y = meanbmgap2015adj, label = country2)) + 
  geom_point() + 
  geom_smooth(data = cross_national_ffs[cross_national_ffs$meanfuelexports<30,],colour="black") +
  labs(x = "\n GNI per capita (logged)", 
       y = "Net gasoline tax \n (constant 2015 USD per liter) \n") +
  theme(legend.position="none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9))

## ----FIG4, fig.width=13, fig.height=11-----------------------------------
#------------------------------------------------------------------------------#
#   Figure 4                                                                   #
#   Gasoline taxes by country in 2003 and 2015                                 #
#------------------------------------------------------------------------------#

# Merging to bring consumption in
monthlypanel_ffs_cons <- merge(monthlypanel_ffs, consdata, by.x = c("country2","year"), by.y = c("wb_code","year"), all.x = T, all.y = F)

# Merging to get VAT from annual dataset
vatdata <- annualpanel_ffs[,c("country2","year","VAT.Rate")]
monthlypanel_ffs_cons <- merge(monthlypanel_ffs_cons, vatdata, by = c("country2","year"), all.x = T, all.y = F)


# VAT-adjusting
monthlypanel_ffs_cons$vat_gasoline <- monthlypanel_ffs_cons$price_usd_2015*(monthlypanel_ffs_cons$VAT.Rate/100)
monthlypanel_ffs_cons$net_bmgap_gasoline <- monthlypanel_ffs_cons$bmgap2015adj - monthlypanel_ffs_cons$vat_gasoline

# Creating bookends variable
# beginbk <- c("01-2003","02-2003","03-2003","04-2003","05-2003","06-2003")
beginbk <- as.Date("2003-01-01"):as.Date("2003-06-01")

# endbk <- c("01-2015","02-2015","03-2015","04-2015","05-2015","06-2015")
endbk <- as.Date("2015-01-01"):as.Date("2015-06-01")

monthlypanel_ffs_cons$bookends <- ifelse(monthlypanel_ffs_cons$date %in% 
                                           beginbk, "Begin", 
                                         ifelse(monthlypanel_ffs_cons$date %in% 
                                                  endbk, "End",""))

# Creating bmg plot comparison data
bmgplot <- data.frame(wb_code = monthlypanel_ffs_cons$country2[monthlypanel_ffs_cons$bookends=="Begin"], 
                      country.short = monthlypanel_ffs_cons$country[monthlypanel_ffs_cons$bookends=="Begin"], 
                      bmgap03 = monthlypanel_ffs_cons$bmgap2015adj[monthlypanel_ffs_cons$bookends=="Begin"],
                      bmgap15 = monthlypanel_ffs_cons$bmgap2015adj[monthlypanel_ffs_cons$bookends=="End"], 
                      netbmgap03 = monthlypanel_ffs_cons$net_bmgap_gasoline[monthlypanel_ffs_cons$bookends=="Begin"],
                      netbmgap15 = monthlypanel_ffs_cons$net_bmgap_gasoline[monthlypanel_ffs_cons$bookends=="End"], 
                      cons = monthlypanel_ffs_cons$gasolinecons[monthlypanel_ffs_cons$bookends=="End"])

bmgplot <- summarize(group_by(bmgplot,wb_code), 
                     bmgap03 = mean(bmgap03,na.rm=T), 
                     bmgap15 = mean(bmgap15,na.rm=T), 
                     netbmgap03 = mean(netbmgap03,na.rm=T), 
                     netbmgap15 = mean(netbmgap15,na.rm=T), 
                     cons = mean(cons,na.rm=T),
                     country.short = unique(country.short,na.rm=T))

# Net change between 1st half 2015 and 1st half 2003
bmgplot$change <- bmgplot$bmgap15-bmgplot$bmgap03
bmgplot$netchange <- bmgplot$netbmgap15-bmgplot$netbmgap03


# Coloring quadrants according to positive v negative net change
bmgplot$quadrant1 <- with(bmgplot,
                          ifelse(!is.na(netbmgap15)&!is.na(netbmgap03),
                                 ifelse(netchange>=0,1,2),
                                 NA))


# Quadrant colors
cbbPalette <- c("#0072B2", "#D55E00")

# 2003 vs 2015 benchmark gap comparison 
ggplot(bmgplot, aes(x=netbmgap03, y=netbmgap15, size=cons, label=country.short,
                    col = factor(quadrant1))) + 
  geom_hline(yintercept=0, linetype="longdash", col="gray") + 
  geom_vline(xintercept=0, linetype="longdash", col="gray") + 
  geom_abline(intercept = 0, slope = 1, col = "darkgray", linetype = "longdash", size = 1) + 
  geom_text_repel() + 
  scale_size(range=c(3,6.5),name="Gasoline consumption \n (thou. bbls per day)",
             breaks=c(1000,5000,10000),labels=c(1000,5000,10000)) + 
  scale_x_continuous(breaks=seq(-0.25,1.25,0.25)) + 
  scale_y_continuous(breaks=seq(-0.5,1.5,0.25)) + 
  scale_colour_manual(values=cbbPalette) +
  labs(y="Net gasoline tax in 1st half 2015 (constant 2015 USD per litre)",
       x="\n Net gasoline tax in 1st half 2003 (constant 2015 USD per litre)") + 
  coord_cartesian(ylim = c(-0.55, 1.25), xlim = c(-0.55, 1.25)) + 
  theme(legend.position=c(0.85,0.1),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=16), 
        axis.text.y = element_text(size=16),
        axis.title.x = element_text(size=18), 
        axis.title.y = element_text(size=18)) +
  guides(col=FALSE)

## ----FIG5setup-----------------------------------------------------------
#------------------------------------------------------------------------------#
#   Figure 5                                                                   #
#   Coefficient plot                                                           #
#------------------------------------------------------------------------------#

# Cross-section
model3_controls <- lm(meanbmgap2015adj ~ log(meangdppcatlas) + I(log(meangdppcatlas)^2) + meanvat + fuel_income_dependence + autocracy_polity*fuel_income_dependence + meangoveffect + avg_gov_debt,  
                      data = cross_national_ffs)

# Cross-section IV
model3_instrument <- plm(meanbmgap2015adj ~ fuel_income_dependence  + log(meangdppcatlas) + I(log(meangdppcatlas)^2) + autocracy_polity*fuel_income_dependence + meangoveffect + avg_gov_debt + meanvat | .- fuel_income_dependence + lnendowment_1960_pc,  data = cross_national_ffs, inst.method = "bvk", index = "country2", model = "pooling")


# Annual panel Fixed Effects
model_annual_controls2 <- lm(meanbmgap2015adj ~ VAT.Rate + log(NY.GNP.PCAP.CD) + I(log(NY.GNP.PCAP.CD)^2) + fuel_income_dependence*autocracy_polity + goveffect + percent_gdp + as.factor(country2) + as.factor(year),  data = annualpanel_ffs)


# Getting coefficients and SEs

# Cross-section
x <- (coeftest(model3_controls, vcov = vcovHC(model3_controls, type="HC1")))

varnames <- c("7. GDP","6. GDP sq.","1. VAT","5. Fuel dependence","4. Democracy","2. Govt Effectiveness","3. Debt (pct GDP)","0. Fuel-Dem")

mod1 <- data.frame(mod = rep("Cross-section", 8), vars = varnames, coefs = x[2:nrow(x),1], se =  x[2:nrow(x),2])


# IV
x <- (coeftest(model3_instrument, vcov = vcovHC(model3_instrument, type="HC1")))

varnames.iv <- c("5. Fuel dependence","7. GDP","6. GDP sq.","4. Democracy","2. Govt Effectiveness","3. Debt (pct GDP)","1. VAT","0. Fuel-Dem")

mod2 <- data.frame(mod = rep("IV cross-section", 8),vars = varnames.iv, coefs = x[2:nrow(x),1], se =  x[2:nrow(x),2])


# Fixed effects
x <- (coeftest(model_annual_controls2, vcov = vcovHC(model_annual_controls2, type="HC1")))

varnames.fe <- c("1. VAT","7. GDP","6. GDP sq.","5. Fuel dependence","4. Democracy","2. Govt Effectiveness","3. Debt (pct GDP)","0. Fuel-Dem")

mod4 <- data.frame(mod = rep("Panel (fixed-effects)", 8), vars = varnames.fe, coefs = x[c(2:8,155),1], se =  x[c(2:8,155),2])


# Putting all together and making CIs
plotdata <- rbind(mod1, mod2, mod4)
rownames(plotdata) <- NULL
plotdata$ci.low <- plotdata$coefs - 1.96*plotdata$se
plotdata$ci.high <- plotdata$coefs + 1.96*plotdata$se

# Specify the width of your confidence intervals
interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

## ----FIG5a, fig.width = 8, fig.height=4----------------------------------
## Coefficient plots -- all four are printed separately and combined in LaTeX

# Plot of GDP separately

ggplot(plotdata %>% filter(vars %in% c("7. GDP", "6. GDP sq.")), 
       aes(colour = factor(mod), shape = factor(mod))) + 
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = vars, ymin = coefs - se*interval1,
                     ymax = coefs + se*interval1),
                 lwd = 1, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = vars, y = coefs, ymin = coefs - se*interval2,
                      ymax = coefs + se*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),
                  fill = "WHITE") +
  scale_x_discrete(labels = c("GNI per capita", "GNI per capita sq."), name = "") +
  scale_y_continuous(name = "") +
  scale_color_grey(name = "Model specification") +
  scale_shape_manual(name = "Model specification",
                     values = c(19, 23, 25)) +
  theme_bw() +
  theme(axis.text = element_text(size=14), 
        axis.title = element_text(size = 14),
        text = element_text(size=14, family = "Times"),
        legend.position = "top",
        legend.title = element_text(size=10),
        legend.text = element_text(size=8)) + 
  coord_flip(ylim = c(-1.5,0.75))

## ----FIG5b, fig.width = 8, fig.height=2----------------------------------
# Plot of debt separately 

ggplot(plotdata %>% filter(vars == "3. Debt (pct GDP)"), 
       aes(colour = factor(mod), shape = factor(mod))) + 
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = vars, ymin = coefs - se*interval1,
                     ymax = coefs + se*interval1),
                 lwd = 1, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = vars, y = coefs, ymin = coefs - se*interval2,
                      ymax = coefs + se*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),
                  fill = "WHITE") +
  scale_x_discrete(labels = "Central \n        Government\n Debt", name = "") +
  scale_y_continuous(name = "") +
  scale_color_grey(name = "Model specification") +
  scale_shape_manual(name = "Model specification",
                     values = c(19, 23, 25)) +
  theme_bw() +
  theme(axis.text = element_text(size=14), 
        axis.title = element_text(size = 14),
        text = element_text(size=14, family = "Times"),
        legend.position = "none") + 
  coord_flip(ylim = c(-0.01,0.005))

## ----FIG5c, fig.width = 8, fig.height=3----------------------------------
# Plot of Fuel separately

ggplot(plotdata %>% filter(vars %in% c("5. Fuel dependence","0. Fuel-Dem")), 
       aes(colour = factor(mod), shape = factor(mod))) + 
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = vars, ymin = coefs - se*interval1,
                     ymax = coefs + se*interval1),
                 lwd = 1, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = vars, y = coefs, ymin = coefs - se*interval2,
                      ymax = coefs + se*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),
                  fill = "WHITE") +
  scale_x_discrete(labels = c("Fuel dependence\n x Autocracy","Fuel dependence"), name = "") +
  scale_y_continuous(name = "") +
  scale_color_grey(name = "Model specification") +
  scale_shape_manual(name = "Model specification",
                     values = c(19, 23, 25)) +
  theme_bw() +
  theme(axis.text = element_text(size=14), 
        axis.title = element_text(size = 14),
        text = element_text(size=14, family = "Times"),
        legend.position = "none") + 
  coord_flip(ylim = c(-0.06,0.031))

## ----FIG5d, fig.width = 8, fig.height=2----------------------------------
# Plot of democracy separately 

ggplot(plotdata %>% filter(vars == "4. Democracy"), 
       aes(colour = factor(mod), shape = factor(mod))) + 
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = vars, ymin = coefs - se*interval1,
                     ymax = coefs + se*interval1),
                 lwd = 1, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = vars, y = coefs, ymin = coefs - se*interval2,
                      ymax = coefs + se*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),
                  fill = "WHITE") +
  scale_x_discrete(labels = "          Autocracy", name = "") +
  scale_y_continuous(breaks = seq(-2,1,0.5), name = "Estimated coefficients with 90% and 95% CIs") +
  scale_color_grey(name = "Model specification") +
  scale_shape_manual(name = "Model specification",
                     values = c(19, 23, 25)) +
  theme_bw() +
  theme(axis.text = element_text(size=14), 
        axis.title = element_text(size = 14),
        text = element_text(size=14, family = "Times"),
        legend.position = "none") + 
  coord_flip(ylim = c(-1.5,0.7))

## ----FIG6, fig.width = 8.34, fig.height = 7.03---------------------------
#------------------------------------------------------------------------------#
#   Figure 6                                                                   #
#   Supply/demand curve graphic                                                #
#------------------------------------------------------------------------------#
demand <- as.data.frame(Hmisc::bezier(c(1, 3, 9),c(9, 3, 1)))

supply <- as.data.frame(Hmisc::bezier(x = c(1, 8, 9),y = c(1, 5, 9)))

curve_intersect <- function(curve1, curve2) {
  # Approximate the functional form of both curves
  curve1_f <- approxfun(curve1$x, curve1$y, rule = 2)
  curve2_f <- approxfun(curve2$x, curve2$y, rule = 2)
  
  # Calculate the intersection of curve 1 and curve 2 along the x-axis
  point_x <- uniroot(function(x) curve1_f(x) - curve2_f(x), 
                     c(min(curve1$x), max(curve1$x)))$root
  
  # Find where point_x is in curve 2
  point_y <- curve2_f(point_x)
  
  # All done!
  return(list(x = point_x, y = point_y))
}

intersection_xy <- curve_intersect(supply, demand)

intersection_xy_df <- intersection_xy %>% as_data_frame()

xint <- as.numeric(intersection_xy_df[1])
yint <- as.numeric(intersection_xy_df[2])

xint2 <- supply$x[round(supply$x,1)==2.7]
yint1 <- supply$y[supply$x == xint2]
yint2 <- demand$y[round(demand$x,1)==2.7]

plot.new()

plot(0:10, 0:10, type = 'n', xaxt="n", yaxt="n", ann=FALSE, xlab = "", ylab = "", bty = "L", xlim = c(1.4,10.5))

loc = par("usr")

lines(supply)
lines(demand)

polygon(c(demand$x,rev(supply$x)),c(demand$y,rev(supply$y)),col="gray90")

segments(x0 = xint, y0 = -1, x1 = xint, y1 = yint, lty = 2)
segments(x0 = 0, y0 = yint1, x1 = xint2, y1 = yint1, lty = 2)
segments(x0 = 0, y0 = yint2, x1 = xint2, y1 = yint2, lty = 2)
segments(x0 = xint2, y0 = -1, x1 = xint2, y1 = yint2, lty = 2)

axis(side = 1, at = c(xint, xint2), labels = c(expression(i[2]),expression(i[1])))
axis(side = 2, at = c(yint1, yint2), labels = c(expression(r[1]),expression(r[2])), las = 1)
text(loc[1], loc[4], "Fuel tax revenues", pos = 2, xpd = T)
text(9, loc[3], "Income", pos = 1, xpd = T)

text(x = 9, y = 9, labels = "Willingness\nto Pay\n(S)", pos = 4)
text(x = 9, y = 1, labels = "Government\nrevenue needs\n(D)", pos = 4)
text(x = 2, y = 4, label = "A", cex = 2)
text(x = 7, y = 4, label = "B", cex = 2)

box(which = "plot", bty = "l")

## ----FIG-S1, fig.width=6.5, fig.height=4.5-------------------------------
#------------------------------------------------------------------------------#
#   Figure S1                                                                  #
#   Implicit taxes and subsidies over time by importers/exporters              #
#------------------------------------------------------------------------------#

## Oil exporter status

oilex <- c("AGO","ARE","AZE","BHR","BRN","COG","COL","DZA","ECU","GAB","IRN",
           "IRQ","KAZ","KWT","LBY","NGA","NOR","OMN","QAT","RUS","SAU","SDN", 
           "SYR","TTO","VEN","YEM")

monthlypanel_ffs_cons$oil.exporter <- 1*(monthlypanel_ffs_cons$country2 %in% oilex)

exporters <- summarize(
  group_by(monthlypanel_ffs_cons, oil.exporter, date), 
  benchgap15adj = mean(bmgap2015adj,na.rm=T)
)

ggplot() + 
  geom_line(data = monthlypanel_ffs_cons, aes(x = date, y = bmgap2015adj, group = country2),
            col = "darkgray", size = 0.25) +
  geom_line(data = exporters, 
            aes(x = date, y = benchgap15adj, 
                group = oil.exporter, col = factor(oil.exporter)), 
            size = 0.75) + 
  annotate(geom = "text", x = as.Date("2004-06-01"), y = c(0.7,0.15), 
           label = c("Oil importers", "Oil exporters"), 
           col = c("#000066","#663300"), size = 3, fontface = "bold") +
  labs(x = "\n Year", 
       y = "Net gasoline tax \n (constant 2015 USD per liter) \n") + 
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks = date_breaks("6 months"),
               labels = date_format("%Y"),
               limits = c(as.Date("2003-01-01"), as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks = seq(-1,2,0.5),
                     labels = c("-1.00","-0.50","0.00","0.50","1.00","1.50",
                                "$2.00")) +
  scale_color_manual(values = c("#000066","#663300")) +
  geom_hline(yintercept = 0, linetype="dashed",size = 1, alpha = 0.5) + 
  theme(legend.position="none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9)) +
  coord_cartesian(ylim = c(-1,2.2))

## ----FIG-S2, fig.width=6.5, fig.height=5---------------------------------
# Income per capita and predicted gasoline taxes

figs2mod <- lm(meanbmgap2015adj ~ loggdp + I(loggdp^2) + avg_gov_debt + meanvat, data = cross_national_ffs)

eff_cf <- effect("loggdp", figs2mod, xlevels = 100)
eff_df <- data.frame(eff_cf)

ggplot(eff_df, 
       aes(x = loggdp, y = fit)) + 
  geom_smooth(aes(ymin = lower, ymax = upper), stat = "identity") +
  geom_rug(aes(x = loggdp, y = meanbmgap2015adj), data = cross_national_ffs, sides="b", size = 0.25) + 
  scale_x_continuous(breaks = seq(5,11,1)) + 
  scale_y_continuous(breaks = seq(0,1.5,0.5)) + 
  labs(x = "\n Income per capita (logged)", 
       y = "Predicted net gasoline tax \n (constant 2015 USD per liter) \n") +
  theme(legend.position="none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9)) +
  coord_cartesian(ylim = c(0, 1.5))

## ----FIG-S3, fig.width=6.5, fig.height=4.5-------------------------------
#------------------------------------------------------------------------------#
#   Figure S3                                                                  #
#   Implicit taxes and subsidies over time by income quartiles                 #
#------------------------------------------------------------------------------#

# By income quartile
cross_national_ffs$income_pctile_atlas <- percent_rank(cross_national_ffs$meangdppcatlas)
cross_national_ffs$income_pctile_pc <- percent_rank(cross_national_ffs$meangdppc)

x <- quantile(cross_national_ffs$meangdppcatlas, na.rm = T)
cross_national_ffs$income_quartile <- ifelse(cross_national_ffs$meangdppcatlas < x[2], "4th quartile",
                                             ifelse(cross_national_ffs$meangdppcatlas < x[3], "3rd quartile",
                                                    ifelse(cross_national_ffs$meangdppcatlas < x[4], "2nd quartile",
                                                           ifelse(cross_national_ffs$meangdppcatlas < x[5], "1st quartile", NA))))

rm(x)

quantile.income <- cross_national_ffs[,c("country2","income_quartile")]

monthlypanel_ffs_cons <- merge(monthlypanel_ffs_cons, quantile.income, by = "country2", all.x = T, all.y = F)

incomegroups <- summarize(
  group_by(monthlypanel_ffs_cons %>% filter(oil.exporter==F), income_quartile, date), 
  benchgap15adj = mean(bmgap2015adj,na.rm=T),
  netbenchgap15adj = mean(net_bmgap_gasoline,na.rm=T)
)

ggplot() + 
  geom_line(data = monthlypanel_ffs_cons %>% filter(oil.exporter==F), aes(x = date, y = bmgap2015adj, group = country2),
            col = "darkgray", size = 0.25) +
  geom_line(data = incomegroups %>% filter(!is.na(income_quartile)), 
            aes(x = date, y = benchgap15adj, 
                group = income_quartile, col = factor(income_quartile)), 
            size = 0.75) + 
  annotate(geom = "text", x = as.Date("2015-09-01"), y = c(0.894,0.55, 0.334, 0.48),
           label = c("1st", "2nd", "3rd", "4th"),
           col = c("#000000","#0072B2", "#D55E00","#009E73"), size = 2, fontface = "bold") +
  labs(x = "\n Year", 
       y = "Net gasoline tax \n (constant 2015 USD per liter) \n") + 
  scale_x_date(breaks = date_breaks("1 year"),
               minor_breaks = date_breaks("6 months"),
               labels = date_format("%Y"),
               limits = c(as.Date("2003-01-01"), as.Date("2015-10-01"))) + 
  scale_y_continuous(breaks = seq(-1,2,0.5),
                     labels = c("-1.00","-0.50","0.00","0.50","1.00","1.50",
                                "$2.00")) +
  scale_color_manual(values = c("#000000","#0072B2", "#D55E00", "#009E73")) +
  geom_hline(yintercept = 0, linetype="dashed",size = 1, alpha = 0.5) + 
  theme(legend.position="none",
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype = 'solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9)) +
  coord_cartesian(ylim = c(-1,2.2))

## ----FIG-S4, fig.width=8,fig.height=6------------------------------------
# Marginal effects figure

library(sjPlot)
sjPlot::plot_model(model3_controls, type = "eff", terms = c("fuel_income_dependence","autocracy_polity"),
           title = "", 
           axis.title = c("Fuel income dependence","Net gasoline tax \n (constant 2015 USD per liter) \n"),
           colors = "bw") +
  scale_linetype_discrete(name = "Regime type", breaks = c(0,1), labels = c("Democracy","Autocracy")) +
  scale_colour_manual(name = "Regime type", breaks = c(0,1), labels = c("Democracy","Autocracy"),
                        values = c("black","black")) +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9),
        legend.position = c(0.8,0.9))

## ----FIG-S5, fig.width = 8, fig.height = 7-------------------------------
# 2003 vs 2015 benchmark gap comparison
# HIGHLIGHTING TOP 30 FUEL DEPENDENT COUNTRIES

topoil <- cross_national_ffs %>% arrange(desc(meanfuelexports)) %>% select(country2) 
top30oil <- topoil$country2[1:30]

bmgplot$top30oil <- ifelse(bmgplot$wb_code %in% top30oil, 1, 0.1)

cbbPalette <- c("#0072B2", "#D55E00")

bmgplot$quadrant2 <- with(bmgplot,
                          ifelse(!is.na(bmgap15)&!is.na(bmgap03),
                                 ifelse(change>=0,1,2),
                                 NA)
)

ggplot(bmgplot, aes(x=netbmgap03, y=netbmgap15, size=cons, label=country.short,
                    col = factor(quadrant2), alpha = top30oil)) + 
  geom_hline(yintercept=0, linetype="longdash", col="gray") + 
  geom_vline(xintercept=0, linetype="longdash", col="gray") + 
  geom_abline(intercept = 0, slope = 1, col = "darkgray", linetype = "longdash", size = 1) + 
  geom_text_repel() + 
  scale_size(range=c(3,6.5),name="Gasoline consumption \n (thou. bbls per day)",
             breaks=c(1000,5000,10000),labels=c(1000,5000,10000)) + 
  scale_x_continuous(breaks=seq(-0.25,1.25,0.25)) + 
  scale_y_continuous(breaks=seq(-0.5,1.5,0.25)) + 
  scale_colour_manual(values=cbbPalette) +
  scale_alpha(guide = 'none') +
  labs(y="Net gasoline tax in 1st half 2015 (constant 2015 USD per litre)",
       x="\n Net gasoline tax in 1st half 2003 (constant 2015 USD per litre)") + 
  coord_cartesian(ylim = c(-0.55, 1.25), xlim = c(-0.55, 1.25)) + 
  theme(legend.position=c(0.85,0.1),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=16), 
        axis.text.y = element_text(size=16),
        axis.title.x = element_text(size=18), 
        axis.title.y = element_text(size=18)) +
  guides(col=FALSE)

## ----FIG-S6, fig.width = 8, fig.height = 7-------------------------------
# 2003 vs 2015 benchmark gap comparison
# HIGHLIGHTING TOP 10 DEMOCRACY SCORE CHANGERS (LOW AND HI)

top10dem.hi <- c("Nepal", "Kyrgyz Republic", "Gabon", "Guinea-Bissau", "Libya")
top10dem.lo <- c("Thailand", "Iran, Islamic Rep.", "Bangladesh", "Burundi", "Turkey")


bmgplot$top10dem <- ifelse(bmgplot$country.short %in% c(top10dem.hi,top10dem.lo), 1, 0.1)
bmgplot$top10dem.hi <- ifelse(bmgplot$country.short %in% c(top10dem.hi), 2, 
                              ifelse(bmgplot$country.short %in% c(top10dem.lo), 1, 3))

cbbPalette <- c("red", "blue", "#000000")

ggplot(bmgplot, aes(x=netbmgap03, y=netbmgap15, size=cons, label=country.short,
                    col = factor(top10dem.hi), alpha = top10dem)) + 
  geom_hline(yintercept=0, linetype="longdash", col="gray") + 
  geom_vline(xintercept=0, linetype="longdash", col="gray") + 
  geom_abline(intercept = 0, slope = 1, col = "darkgray", linetype = "longdash", size = 1) + 
  geom_text_repel() + 
  scale_size(range=c(3,6.5),name="Gasoline consumption \n (thou. bbls per day)",
             breaks=c(1000,5000,10000),labels=c(1000,5000,10000)) + 
  scale_x_continuous(breaks=seq(-0.25,1.25,0.25)) + 
  scale_y_continuous(breaks=seq(-0.5,1.5,0.25)) + 
  scale_colour_manual(values=cbbPalette) +
  scale_alpha(guide = 'none') +
  labs(y="Net gasoline tax in 1st half 2015 (constant 2015 USD per litre)",
       x="\n Net gasoline tax in 1st half 2003 (constant 2015 USD per litre)") + 
  coord_cartesian(ylim = c(-0.55, 1.25), xlim = c(-0.55, 1.25)) + 
  theme(legend.position=c(0.85,0.1),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=16), 
        axis.text.y = element_text(size=16),
        axis.title.x = element_text(size=18), 
        axis.title.y = element_text(size=18)) +
  guides(col=FALSE)

## ----FIG-S7, fig.width=6, fig.height=6-----------------------------------
#------------------------------------------------------------------------------#
#   FIG S7
#   Within/across variation                                                    #
#   Assessing change over time in monthly data                                 #
#------------------------------------------------------------------------------#

monthly.stats.id <- summarize(group_by(monthlypanel_ffs, country2), 
                             nums = n(),
                             sigma2 = var(bmgap2015adj, na.rm = T),
                             sigma = sd(bmgap2015adj, na.rm = T),
                             mu = mean(bmgap2015adj, na.rm = T))

monthly.stats.yr <- summarize(group_by(monthlypanel_ffs, year_month), 
                              nums = n(),
                              sigma2 = var(bmgap2015adj, na.rm = T),
                              sigma = sd(bmgap2015adj, na.rm = T),
                              mu = mean(bmgap2015adj, na.rm = T))

# Comparison to monthly versus country means

monthlypanel_ffs_3 <- merge(monthlypanel_ffs, monthly.stats.id, by = "country2")
monthlypanel_ffs_3 <- merge(monthlypanel_ffs_3, monthly.stats.yr, by = "year_month")

monthlypanel_ffs_3$bmg.id <- monthlypanel_ffs_3$bmgap2015adj - monthlypanel_ffs_3$mu.x
monthlypanel_ffs_3$bmg.yr <- monthlypanel_ffs_3$bmgap2015adj - monthlypanel_ffs_3$mu.y

monthlypanel_ffs_3 <- monthlypanel_ffs_3 %>% arrange(country2, date)

bmg.id.q <- quantile(monthlypanel_ffs_3$bmg.id, probs = c(0.025,0.975), na.rm = T)
bmg.yr.q <- quantile(monthlypanel_ffs_3$bmg.yr, probs = c(0.025,0.975), na.rm = T)


ggplot(monthlypanel_ffs_3, aes(x = bmg.id, y = bmg.yr)) + 
  geom_point(size = 0.75, alpha = 0.3) + 
  geom_vline(xintercept = bmg.id.q, linetype = "dashed", alpha = 0.5) +
  geom_hline(yintercept = bmg.yr.q, linetype = "dashed", alpha = 0.5) +
  annotate(geom = "text", x = c(-2,-2), y = bmg.yr.q, 
           label = c(paste("2.5% quantile = ", eval(round(bmg.yr.q[1], 2)), sep = ""),
                     paste("97.5% quantile = ", eval(round(bmg.yr.q[2], 2)), sep = "")),
           size = 3, hjust = 0, vjust = -1, alpha = 0.5) +
  annotate(geom = "text", y = c(-2.1,-2.1), x = bmg.id.q, 
           label = c(paste("2.5% quantile = ", eval(round(bmg.id.q[1], 2)), sep = ""),
                     paste("97.5% quantile = ", eval(round(bmg.id.q[2], 2)), sep = "")),
           size = 3, hjust = c(1.1, -0.1), vjust = c(0,0), alpha = 0.5) +
  scale_x_continuous(breaks = seq(-2,2,0.5)) +
  scale_y_continuous(breaks = seq(-2,2,0.5)) +
  labs(x = "Within-country mean deviation", 
       y = "Across-country mean deviation") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), 
        axis.text.x = element_text(size=8), 
        axis.text.y = element_text(size=8), 
        title = element_text(size=9)) +
  coord_cartesian(xlim = c(-2, 2), ylim = c(-2, 2))

